###Descriptive Information
table(taiwan_2$screen_name)
user_name<-data.frame(table(taiwan_2$screen_name))### most tweeted user has 146 tweets, least tweeted user has 1 tweet.
taiwan_2$timestamp <- as.POSIXct(taiwan_2$created_at, format="%Y-%m-%d %H:%M:%S", tz="UTC")
visit_date <- as.Date("2022-08-02")

# Separate tweets into "before" and "after" Pelosi's visit
tweets <- taiwan_2 %>%
  mutate(date = as.Date(timestamp),
         period = ifelse(date < visit_date, "Before", "After"))
table(tweets$period)
tweets_date<-data.frame(table(tweets$date))
###
library(openxlsx)
write.xlsx(tweets_date,"tweets_date_pelosi.xlsx")
##there are 2109 tweets before pelosi, there are 16534 tweets the day she came, there are 64436 tweets after Pelosi
# Install required packages if not already installed
install.packages(c("tidyverse", "tidytext", "syuzhet", "lubridate", "ggplot2"))

# Load necessary libraries
library(tidyverse)
library(tidytext)
library(syuzhet)
library(lubridate)
library(ggplot2)

# Load the dataset
tweets <- read.csv("tweets.csv")  # Change this to the actual file path

# Convert date column to proper format (Assuming there's a column named 'timestamp')
taiwan_english$timestamp <- as.POSIXct(taiwan_english$created_at, format="%Y-%m-%d %H:%M:%S", tz="UTC")

# Define the date of Nancy Pelosi???s visit
visit_date <- as.Date("2022-08-02")

# Separate tweets into "before" and "after" Pelosi's visit
tweets <- taiwan_english %>%
  mutate(date = as.Date(timestamp),
         period = ifelse(date < visit_date, "Before", "After"))
table(tweets$period)
# Perform sentiment analysis using the "syuzhet" package
tweets$sentiment_score <- get_sentiment(tweets$text, method="bing")  # Using Bing lexicon

# Categorize sentiment into Positive, Negative, and Neutral
tweets <- tweets %>%
  mutate(sentiment = case_when(
    sentiment_score > 0 ~ "Positive",
    sentiment_score < 0 ~ "Negative",
    TRUE ~ "Neutral"
  ))

# Count sentiment distribution before and after the visit
sentiment_distribution <- tweets %>%
  group_by(period, sentiment) %>%
  summarise(count = n(), .groups = "drop") %>%
  mutate(percentage = round((count / sum(count)) * 100, 2))

print(sentiment_distribution)  # Display sentiment change before and after the visit

# Sample examples of tweets for each sentiment
examples <- tweets %>%
  group_by(sentiment) %>%
  slice_sample(n = 1) %>%
  select(sentiment, text)

print(examples)  # Display sample tweets for each sentiment

# Tokenize words and analyze most common words for each sentiment
word_analysis <- tweets %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words) %>%  # Remove common stop words
  count(sentiment, word, sort = TRUE)

# Get top words per sentiment
top_words <- word_analysis %>%
  group_by(sentiment) %>%
  top_n(10, wt = n)

# Plot sentiment trend over time
sentiment_trend <- tweets %>%
  group_by(date, sentiment) %>%
  summarise(count = n(), .groups = "drop")

ggplot(sentiment_trend, aes(x = date, y = count, color = sentiment)) +
  geom_line(size = 1.2) +
  labs(title = "Sentiment Change Over Time",
       x = "Date", y = "Tweet Count") +
  theme_minimal()

# Plot top words per sentiment category
ggplot(top_words, aes(x = reorder(word, n), y = n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free") +
  coord_flip() +
  labs(title = "Most Frequent Words by Sentiment",
       x = "Words", y = "Count") +
  theme_minimal()
##################
# Extract emotions using syuzhet
emotions <- get_nrc_sentiment(tweets$text)

# Merge emotion scores with original tweet data
tweets <- cbind(tweets, emotions)

# Summarize emotion trends before and after Pelosi's visit
emotion_summary <- tweets %>%
  group_by(period) %>%
  summarise(across(anger:trust, sum)) %>%
  pivot_longer(cols = anger:trust, names_to = "emotion", values_to = "count")

# Print emotion summary before and after the visit
print(emotion_summary)

# Emotion trends over time (Daily emotion count)
emotion_trend <- tweets %>%
  group_by(date) %>%
  summarise(across(anger:trust, sum)) %>%
  pivot_longer(cols = anger:trust, names_to = "emotion", values_to = "count")

# Plot emotion trends over time
ggplot(emotion_trend, aes(x = date, y = count, color = emotion)) +
  geom_line(size = 1.2) +
  geom_vline(xintercept = as.numeric(visit_date), linetype="dashed", color="red", size=1) +
  labs(title = "Emotion Trends Before and After Pelosi's Visit",
       subtitle = "Dashed red line marks the visit date",
       x = "Date", y = "Emotion Score") +
  theme_minimal()
# Function to get an example tweet for each emotion
get_example_tweet <- function(emotion_name) {
  tweets %>%
    filter(!!sym(emotion_name) > 0) %>%
    arrange(desc(!!sym(emotion_name))) %>%
    slice(1) %>%
    select(text, period, !!sym(emotion_name))
}

# Get an example tweet for each emotion
example_tweets <- map_df(colnames(emotions), get_example_tweet)

# Print example tweets for each emotion
print(example_tweets)
#########################################
library(tidyverse)
library(syuzhet)
library(lubridate)
library(ggplot2)
library(wordcloud)
library(RColorBrewer)
# Tokenize words and remove stop words
word_data <- tweets %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words) %>%
  count(word, sort = TRUE)

# Function to generate word cloud for an emotion
generate_wordcloud <- function(emotion_name) {
  emotion_words <- tweets %>%
    filter(!!sym(emotion_name) > 0) %>%
    unnest_tokens(word, text) %>%
    anti_join(stop_words) %>%
    count(word, sort = TRUE)
  
  wordcloud(words = emotion_words$word, 
            freq = emotion_words$n, 
            min.freq = 5, 
            max.words = 100, 
            colors = brewer.pal(8, "Dark2"), 
            random.order = FALSE)
}

# Plot word clouds for selected emotions
par(mfrow = c(2, 2))  # Arrange plots in a 2x2 grid
generate_wordcloud("anger")
title("Anger Word Cloud")
generate_wordcloud("fear")
title("Fear Word Cloud")
generate_wordcloud("trust")
title("Trust Word Cloud")
generate_wordcloud("joy")
title("Joy Word Cloud")
